Using Standard Methods
set.seed(123)
library(mgcv)
dat = gamSim(1, n=400, dist="normal", scale=1, verbose=F)
mod = lm(y ~ x1 + x2, data=dat)
summary(mod)
Call:
lm(formula = y ~ x1 + x2, data = dat)
Residuals:
Min 1Q Median 3Q Max
-8.6634 -1.3220 -0.0571 1.5297 7.0937
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 7.2745 0.3332 21.83 <2e-16 ***
x1 6.3616 0.4352 14.62 <2e-16 ***
x2 -5.1214 0.4526 -11.31 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 2.532 on 397 degrees of freedom
Multiple R-squared: 0.4594, Adjusted R-squared: 0.4567
F-statistic: 168.7 on 2 and 397 DF, p-value: < 2.2e-16


# requires broom and glue packages
broom::augment(mod) %>%
ggplot(aes(x=.fitted, y=y)) +
geom_point(alpha=.25, color='#ff5500') +
geom_smooth(se=F, color='#00aaff') +
annotate('text',
label = glue::glue("Rsq = {round(summary(mod)$r.squared, 2)}"),
x = 4,
y = 16) +
labs(title='Fitted vs. Observed') +
theme_minimal()


Heteroscedasticity, non-normality, etc.
modlog = lm(log(y) ~ x1 + x2, dat)
summary(modlog)
plot(modlog, which = 1:2)
broom::augment(mod) %>%
ggplot(aes(x=.fitted, y=y)) +
geom_point(alpha=.25, color='#ff5500') +
geom_smooth(se=F, color='#00aaff') +
annotate('text',
label = glue::glue("Rsq = {round(summary(modlog)$r.squared, 2)}"),
x = 4,
y = 16) +
labs(title='Fitted vs. Observed') +
theme_minimal()
Polynomial Regression
set.seed(123)
x = runif(500)
mu = sin(2 * (4 * x - 2)) + 2 * exp(-(16 ^ 2) * ((x - .5) ^ 2))
y = rnorm(500, mu, .3)
d = data.frame(x,y)
Polynomial regression is problematic
A standard linear regression is definitely not going to capture this relationship. As above, we could try and use polynomial regression here, e.g. fitting a quadratic or cubic function within the standard regression framework. However, this is unrealistic at best and at worst isn’t useful for complex relationships. In the following, even with a polynomial of degree 15 the fit is fairly poor in many areas, and ‘wiggles’ in some places where there doesn’t appear to be a need to.
LS0tCnRpdGxlOiAiVGhlIENhc2UgZm9yIEdBTXMiCm91dHB1dDogaHRtbF9ub3RlYm9vawplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGlubGluZQotLS0KCgojIyBVc2luZyBTdGFuZGFyZCBNZXRob2RzCgpgYGB7ciBzaW11bGF0ZWRfZGF0YX0Kc2V0LnNlZWQoMTIzKQpsaWJyYXJ5KG1nY3YpCmRhdCA9IGdhbVNpbSgxLCBuPTQwMCwgZGlzdD0ibm9ybWFsIiwgc2NhbGU9MSwgdmVyYm9zZT1GKQpgYGAKCmBgYHtyIGRlbW9sbX0KbW9kID0gbG0oeSB+IHgxICsgeDIsIGRhdGE9ZGF0KQpzdW1tYXJ5KG1vZCkKYGBgCmBgYHtyIGRpYWdub3N0aWNzfQpwbG90KG1vZCwgd2hpY2ggPSAxOjIpCmBgYAoKYGBge3IgZ2VuZXJhbF9maXR9CiMgcmVxdWlyZXMgYnJvb20gYW5kIGdsdWUgcGFja2FnZXMKYnJvb206OmF1Z21lbnQobW9kKSAlPiUgCiAgZ2dwbG90KGFlcyh4PS5maXR0ZWQsIHk9eSkpICsKICBnZW9tX3BvaW50KGFscGhhPS4yNSwgY29sb3I9JyNmZjU1MDAnKSArIAogIGdlb21fc21vb3RoKHNlPUYsIGNvbG9yPScjMDBhYWZmJykgKwogIGFubm90YXRlKCd0ZXh0JywKICAgICAgICAgICBsYWJlbCA9IGdsdWU6OmdsdWUoIlJzcSA9IHtyb3VuZChzdW1tYXJ5KG1vZCkkci5zcXVhcmVkLCAyKX0iKSwKICAgICAgICAgICB4ID0gNCwKICAgICAgICAgICB5ID0gMTYpICsKICBsYWJzKHRpdGxlPSdGaXR0ZWQgdnMuIE9ic2VydmVkJykgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCmBgYHtyIHJlbGF0aW9uc2hpcHN9CmRhdCAlPiUgCiAgc2VsZWN0KHgxLCB4MiwgeSkgJT4lIAogIGdhdGhlcihrZXk9dmFyaWFibGUsIHZhbHVlPVByZWRpY3RvciwgLXkpICU+JSAKICBnZ3Bsb3QoYWVzKHg9UHJlZGljdG9yLCB5PXkpKSArCiAgZ2VvbV9wb2ludChhbHBoYT0uMjUsIGNvbG9yPScjZmY1NTAwJykgKyAKICBnZW9tX3Ntb290aChhZXMoKSwgY29sb3I9JyMwMGFhZmYnLCBzZT1GKSArCiAgZmFjZXRfZ3JpZCh+dmFyaWFibGUpICsgCiAgbGFicyh0aXRsZT0nUHJlZGljdG9ycyB2cy4gWScpICsKICB0aGVtZV9taW5pbWFsKCkgKwogIHRoZW1lKGxlZ2VuZC50aXRsZT1lbGVtZW50X2JsYW5rKCksCiAgICAgICAgbGVnZW5kLmJhY2tncm91bmQ9ZWxlbWVudF9ibGFuaygpLAogICAgICAgIGxlZ2VuZC5rZXk9ZWxlbWVudF9ibGFuaygpKQpgYGAKCiMjIEhldGVyb3NjZWRhc3RpY2l0eSwgbm9uLW5vcm1hbGl0eSwgZXRjLgoKYGBge3IgbG9nX25vX2hlbHB9Cm1vZGxvZyA9IGxtKGxvZyh5KSB+IHgxICsgeDIsIGRhdCkKc3VtbWFyeShtb2Rsb2cpCnBsb3QobW9kbG9nLCB3aGljaCA9IDE6MikKCmJyb29tOjphdWdtZW50KG1vZCkgJT4lIAogIGdncGxvdChhZXMoeD0uZml0dGVkLCB5PXkpKSArCiAgZ2VvbV9wb2ludChhbHBoYT0uMjUsIGNvbG9yPScjZmY1NTAwJykgKyAKICBnZW9tX3Ntb290aChzZT1GLCBjb2xvcj0nIzAwYWFmZicpICsKICBhbm5vdGF0ZSgndGV4dCcsCiAgICAgICAgICAgbGFiZWwgPSBnbHVlOjpnbHVlKCJSc3EgPSB7cm91bmQoc3VtbWFyeShtb2Rsb2cpJHIuc3F1YXJlZCwgMil9IiksCiAgICAgICAgICAgeCA9IDQsCiAgICAgICAgICAgeSA9IDE2KSArCiAgbGFicyh0aXRsZT0nRml0dGVkIHZzLiBPYnNlcnZlZCcpICsKICB0aGVtZV9taW5pbWFsKCkKYGBgCgoKIyMgUG9seW5vbWlhbCBSZWdyZXNzaW9uCgpgYGB7ciBzaW1EYXRhfQpzZXQuc2VlZCgxMjMpCnggPSBydW5pZig1MDApCm11ID0gc2luKDIgKiAoNCAqIHggLSAyKSkgKyAyICogZXhwKC0oMTYgXiAyKSAqICgoeCAtIC41KSBeIDIpKQp5ID0gcm5vcm0oNTAwLCBtdSwgLjMpCmQgPSBkYXRhLmZyYW1lKHgseSkgCmBgYAoKCgpgYGB7ciBzaW1EYXRhUGxvdCwgZWNobz1GfQpsaWJyYXJ5KHBsb3RseSkgICMgaW5zdGFsbCBpZiB5b3UgZG9uJ3QgaGF2ZQoKZml0cyA9IHNhcHBseShzZXEoMywxNSwgMyksIGZ1bmN0aW9uKHApIGZpdHRlZChsbSh5fnBvbHkoeCxwKSkpKSAlPiUgCiAgZGF0YS5mcmFtZSh4LCB5LCAuKSAlPiUgCiAgZ2F0aGVyKGtleT1wb2x5bm9taWFsLCB2YWx1ZT1maXRzLCAteCwgLXkpICU+JSAKICBtdXRhdGUocG9seW5vbWlhbCA9IGZhY3Rvcihwb2x5bm9taWFsLCBsYWJlbHM9c2VxKDMsMTUsIDMpKSkKCnBsb3RfbHkoZGF0YT1kKSAlPiUgCiAgYWRkX21hcmtlcnMofngsIH55LCBtYXJrZXI9bGlzdChjb2xvcj0nI2ZmNTUwMCcsIG9wYWNpdHk9LjIpLCBzaG93bGVnZW5kPUYpICU+JSAKICBhZGRfbGluZXMofngsIH5maXRzLCBjb2xvcj1+cG9seW5vbWlhbCwgZGF0YT1maXRzKSAlPiUgCiAgY29uZmlnKGRpc3BsYXlNb2RlQmFyPUYpICU+JSAKICBsYXlvdXQoCiAgICB4YXhpcyA9IGxpc3QoemVyb2xpbmU9RkFMU0UsCiAgICAgICAgICAgICAgICAgc2hvd2dyaWQ9RkFMU0UpLAogICAgeWF4aXMgPSBsaXN0KHplcm9saW5lPUZBTFNFLAogICAgICAgICAgICAgICAgIHNob3dncmlkPUZBTFNFKQogICkKYGBgCgoKCiMjIyMgUG9seW5vbWlhbCByZWdyZXNzaW9uIGlzIHByb2JsZW1hdGljCgpBIHN0YW5kYXJkIGxpbmVhciByZWdyZXNzaW9uIGlzIGRlZmluaXRlbHkgbm90IGdvaW5nIHRvIGNhcHR1cmUgdGhpcyByZWxhdGlvbnNoaXAuICBBcyBhYm92ZSwgd2UgY291bGQgdHJ5IGFuZCB1c2UgcG9seW5vbWlhbCByZWdyZXNzaW9uIGhlcmUsIGUuZy4gZml0dGluZyBhIHF1YWRyYXRpYyBvciBjdWJpYyBmdW5jdGlvbiB3aXRoaW4gdGhlIHN0YW5kYXJkIHJlZ3Jlc3Npb24gZnJhbWV3b3JrLiAgSG93ZXZlciwgdGhpcyBpcyB1bnJlYWxpc3RpYyBhdCBiZXN0IGFuZCBhdCB3b3JzdCBpc24ndCB1c2VmdWwgZm9yIGNvbXBsZXggcmVsYXRpb25zaGlwcy4gSW4gdGhlIGZvbGxvd2luZywgZXZlbiB3aXRoIGEgcG9seW5vbWlhbCBvZiBkZWdyZWUgMTUgdGhlIGZpdCBpcyBmYWlybHkgcG9vciBpbiBtYW55IGFyZWFzLCBhbmQgJ3dpZ2dsZXMnIGluIHNvbWUgcGxhY2VzIHdoZXJlIHRoZXJlIGRvZXNuJ3QgYXBwZWFyIHRvIGJlIGEgbmVlZCB0by4KCmBgYHtyIHBvbHlyZWcsIGVjaG89RkFMU0V9CgoKCgpgYGAKCg==